home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Format CD 52
/
Amiga Format AFCD52 (Issue 136, May 2000).iso
/
-serious-
/
programming
/
c
/
icu-1.3.1
/
icuapps
/
uconv
/
uconv.cpp
< prev
next >
Wrap
C/C++ Source or Header
|
2000-02-23
|
8KB
|
292 lines
//
// uconv demonstration example of ICU and codepage conversion
// Purpose is to be a similar tool as the UNIX iconv program.
// Shows the usage of the ICU classes: UnicodeConverterCPP, UnicodeString
//
// Usage: uconv [flag] [file]
// -f [codeset] Convert file from this codeset
// -t [codeset] Convert file to this code set
// -l Display all available converters
// If no file is given, uconv tries to read from stdin
//
// To compile: c++ -o uconv -I${ICUHOME}/include -Wall -g uconv.cpp -L${ICUHOME}/lib -licu-uc -licu-i18n
//
// Original contributor was Jonas Utterström <jonas.utterstrom@vittran.norrnod.se> in 1999
// Permission is granted to use, copy, modify, and distribute this software
//
#include <stdio.h>
#include <errno.h>
// This is the UnicodeConverterCPP headerfile
#include <convert.h>
// This is the UnicodeString headerfile
#include <unistr.h>
static const size_t buffsize = 4096;
// Print all available codepage converters
void printAllConverters()
{
UErrorCode err = U_ZERO_ERROR;
int32_t num;
size_t numprint = 0;
static const size_t maxline = 70;
// getAvailable returns a string-table with all available codepages
const char* const* convtable = UnicodeConverterCPP::getAvailableNames(num, err);
if (U_FAILURE(err))
{
fprintf(stderr, "getAvailableNames failed\n");
return;
}
for (int32_t i = 0; i<num-1; i++)
{
// ucnv_getAvailableName gets the codepage name at a specific
// index
numprint += printf("%-20s", convtable[i]);
if (numprint>maxline)
{
putchar('\n');
numprint = 0;
}
}
puts(convtable[num-1]);
}
// Convert a file from one encoding to another
bool convertFile(const char* fromcpage,
const char* tocpage,
FILE* infile,
FILE* outfile)
{
bool ret = true;
UnicodeConverterCPP* convfrom = 0;
UnicodeConverterCPP* convto = 0;
UErrorCode err = U_ZERO_ERROR;
bool flush;
const char* cbuffiter;
char* buffiter;
const size_t readsize = buffsize-1;
char* buff = 0;
const UChar* cuniiter;
UChar* uniiter;
UChar* unibuff;
size_t rd, totbuffsize;
// Create codepage converter. If the codepage or its aliases weren't
// available, it returns NULL and a failure code
convfrom = new UnicodeConverterCPP(fromcpage, err);
if (U_FAILURE(err))
{
fprintf(stderr, "Unknown codepage: %s\n", fromcpage);
goto error_exit;
}
convto = new UnicodeConverterCPP(tocpage, err);
if (U_FAILURE(err))
{
fprintf(stderr, "Unknown codepage %s\n", tocpage);
goto error_exit;
}
// To ensure that the buffer always is of enough size, we
// must take the worst case scenario, that is the character in the codepage
// that uses the most bytes and multiply it against the buffsize
totbuffsize = buffsize*convto->getMaxBytesPerChar();
buff = new char[totbuffsize];
unibuff = new UChar[buffsize];
do
{
rd = fread(buff, 1, readsize, infile);
if (ferror(infile) != 0)
{
fprintf(stderr, "Error reading from input file: %s\n", strerror(errno));
goto error_exit;
}
// Convert the read buffer into the new coding
// After the call 'uniiter' will be placed on the last character that was converted
// in the 'unibuff'.
// Also the 'cbuffiter' is positioned on the last converted character.
// At the last conversion in the file, flush should be set to true so that
// we get all characters converted
//
// The converter must be flushed at the end of conversion so that characters
// on hold also will be written
uniiter = unibuff;
cbuffiter = buff;
flush = rd!=readsize;
convfrom->toUnicode(uniiter, uniiter+buffsize, cbuffiter, cbuffiter+rd,
NULL, flush, err);
if (U_FAILURE(err))
{
fprintf(stderr, "Conversion to Unicode from codepage failed\n");
goto error_exit;
}
// At the last conversion, the converted characters should be equal to number
// of chars read.
if (flush && cbuffiter!=(buff+rd))
{
fprintf(stderr, "Premature end of input, when converting from codepage to Unicode\n");
goto error_exit;
}
// Convert the Unicode buffer into the destination codepage
// Again 'buffiter' will be placed on the last converted character
// And 'cuniiter' will be placed on the last converted unicode character
// At the last conversion flush should be set to true to ensure that
// all characters left get converted
buffiter = buff;
cuniiter = unibuff;
convto->fromUnicode(buffiter, buffiter+totbuffsize,
cuniiter, cuniiter+(size_t)(uniiter-unibuff),
NULL, flush, err);
if (U_FAILURE(err))
{
fprintf(stderr, "Problem converting from Unicode to codepage\n");
goto error_exit;
}
// At the last conversion, the converted characters should be equal to number
// of consumed characters.
if (flush && cuniiter!=(unibuff+(size_t)(uniiter-unibuff)))
{
fprintf(stderr, "Premature end of Unicode conversion to codepage\n");
goto error_exit;
}
// Finally, write the converted buffer to the output file
rd = (size_t)(buffiter-buff);
if (fwrite(buff, 1, rd, outfile) != rd)
{
fprintf(stderr, "The converted text couldn't be written: %s \n", strerror(errno));
goto error_exit;
}
} while (!flush); // Stop when we have flushed the converters (this means that it's the end of output)
goto normal_exit;
error_exit:
ret = true;
normal_exit:
delete convfrom;
delete convto;
// Close the created converters
delete [] buff;
delete [] unibuff;
return ret;
}
void printUsage()
{
printf("Usage: uconv [flag] [file]\n"
"-f [codeset] Convert file from this codeset\n"
"-t [codeset] Convert file to this code set\n"
"-h Show this help text\n"
"-l List all available codepages\n");
}
int main(int argc, char** argv)
{
FILE* file = 0;
FILE* infile;
int ret = 0;
const char* fromcpage = 0;
const char* tocpage = 0;
const char* infilestr = 0;
char** iter = argv+1;
char** end = argv+argc;
// First, get the arguments from command-line
// to know the codepages to convert between
for (; iter!=end; iter++)
{
// Check for from charset
if (strcmp("-f", *iter) == 0)
{
iter++;
if (iter!=end)
fromcpage = *iter;
}
else if (strcmp("-t", *iter) == 0)
{
iter++;
if (iter!=end)
tocpage = *iter;
}
else if (strcmp("-l", *iter) == 0)
{
printAllConverters();
goto normal_exit;
}
else if (strcmp("-h", *iter) == 0)
{
printUsage();
goto normal_exit;
}
else
{
infilestr = *iter;
}
}
if (fromcpage==0 && tocpage==0)
{
printUsage();
goto normal_exit;
}
if (fromcpage==0)
{
fprintf(stderr, "No conversion from codeset given (use -f)\n");
goto error_exit;
}
if (tocpage==0)
{
fprintf(stderr, "No conversion to codeset given (use -t)\n");
goto error_exit;
}
// Open the correct input file or connect to stdin for reading input
if (infilestr!=0)
{
file = fopen(infilestr, "r");
if (file==0)
{
fprintf(stderr, "Couldn't open the input file: %s\n", strerror(errno));
return 1;
}
infile = file;
}
else
infile = stdin;
if (!convertFile(fromcpage, tocpage, infile, stdout))
goto error_exit;
goto normal_exit;
error_exit:
ret = 1;
normal_exit:
if (file!=0)
fclose(file);
return ret;
}